home *** CD-ROM | disk | FTP | other *** search
- ;===================================================
- ; PROGRAM DUP Version 1.0 by Dave Whitman
- ;
- ; Filter to remove adjacent duplicate lines.
- ; Reads StdIn and writes non-duplicated lines to StdOut.
- ; Duplicate lines must be adjacent to be detected.
- ;
- ; Syntax: DUP [?] [/nn] [<infile] [>outfile]
- ;
- ; The ? option prints a help message.
- ; If option /nn is used, comparision is based on
- ; the first nn characters only.
- ;
- ; Requires DOS 2.0, will abort under earlier versions.
- ;====================================================
-
- ;============
- ; Equates
- ;============
-
- @read equ 3FH ;read file/device
- @write equ 40H ;write file/device
- @dosver equ 30H ;get dos version
- @prnstr equ 09H ;print string
-
- cr equ 0DH ;carriage return character
- lf equ 0AH ;line feed character
-
- stdin equ 0000H ;standard input
- stdout equ 0001H ;standard output
- u equ 01H ;upper case option selected
-
- buf_size equ 8192 ;size of input and output buffers
-
- param_count equ [80H]
- param_area equ [81H]
- mem_avail equ [06H] ;PSP field: memory available in segment
-
- up_mask equ 11011111B ;mask for lowercase conversion (with AND)
- low_mask equ 00100000B ;mask for uppercase conversion (with OR)
-
- main proc far
- call setup ;check dos, parse options
- call process ;count w, l, c from std i/o
- int 20H ;and return to dos
- endp
-
- ;======================================
- ; SUBROUTINE SETUP
- ; Checks for proper DOS, parses options
- ;======================================
- setup proc near
-
- mov ah, @dosver ;what dos are we under?
- int 21H
- cmp al, 2 ;2.0 or over?
- jae a_mem ;yes, skip
-
- mov ah, @write ;no, bitch
- mov bx, 2 ;on stderror
- mov cx, dosend-baddos
- mov dx, offset(baddos)
- int 21H
-
- pop ax ;reset stack
- int 20H ;and exit
-
- a_mem mov ax, mem_avail ;do we have room for the buffers?
- cmp ax, (buf_size*2)+200H
- jae a_help ;yes
- mov ah, @write ;no, bitch
- mov bx, 2 ;on stderror
- mov cx, memend-nomem
- mov dx, offset(nomem)
- int 21H
- pop ax ;reset stack
- int 20H ;and exit
-
- a_help xor ch,ch ;cx <== param count
- mov cl, param_count ; "
- cmp cl, 00H ;any params?
- je aexit ;return if none
-
- mov di, offset(param_area) ;scan for help request
- mov al, '?'
- repnz ;repeat until matched or end
- scasb
- jnz a_par ;reached end, no match? skip
- mov ah, @write ;founc ?, so print help
- mov bx, 2 ;on stderror
- mov cx, helpend-help
- mov dx, offset(help)
- int 21H
- pop ax ;pop stack
- int 20H ;and exit
-
- a_par xor ch, ch ;cx <== param count
- mov cl, param_count ; "
- mov di, offset(param_area) ;scan for options
- a_loop mov al, '/' ;will be marked with /
- repnz ;repeat until matched or end
- scasb
- jnz aexit ;reached end, no match? skip
-
- xor ax,ax ;will hold building number
- jmps enter ;convert string to binary
-
- s2bin mov bl, 10 ;multiply running total by 10
- mul al,bl
- jo bad_num ;overflow? error exit
- enter xor bx,bx ;clear out top half
- mov bl, [di] ;get a digit into al
- inc di ;bump pointer
- cmp bl, ' ' ;if space, done
- je aexit
- cmp bl, '0' ;must be between 0
- jb bad_num
- cmp bl, '9' ;and 9
- ja bad_num
- sub bl, '0' ;convert to binary
- add ax, bx ;add to running total
- jo bad_num ;overflow? error exit
- loop s2bin
-
- cmp ax, 0FFH ;too long?
- jg bad_num ;abort
- mov comp_length, al ;else store converted number
- aexit
- ret ;normal return
-
- bad_num mov ah, @write ;print error message
- mov bx, 2 ;on stderror
- mov cx, numend-nummsg
- mov dx, offset(nummsg)
- int 21H ;and use default
- ret
-
- baddos db cr lf 'This program requires DOS 2.0!' cr, lf
- dosend
-
- nomem db cr lf 'Insufficient memory, program aborted' cr lf
- memend
-
- nummsg db cr lf 'Length parameter non-numeric or greater than 255'
- db cr lf
- numend
-
- help db cr lf
- db 'DUP version 1.0 by D. Whitman' cr lf
- db cr lf
- db 'Reads stdin and writes all non-duplicated lines to stdout.'
- db cr lf
- db 'Duplicates must be adjacent to be detected.' cr lf
- db 'DUP will normally be used in a pipeline, following SORT.'
- db cr lf cr lf
- db 'Syntax: DUP [?] [/nn] [<infile] [>outfile]' cr lf
- db cr lf
- db 'Options:' cr lf
- db ' ? = print this help message' cr lf
- db ' /nn = base comparision on first nn chars only' cr lf
- db cr lf
- db 'This program is in the public domain.' cr lf
- db cr lf
- helpend
- endp
-
- ;=========================================
- ; SUBROUTINE PROCESS
- ;
- ; while not(EOF) do
- ; begin
- ; get next line
- ; if curr_line <> last_line
- ; then begin
- ; write(curr_line)
- ; last_line := curr_line
- ; end
- ; end
- ;==========================================
-
- ;==================
- ; Register assignments:
- ;
- ; SI ^buf_in
- ; DI ^buf_out
- ; CX # of chars left in buf_in
- ;===================
-
- process proc near
- movw outnum, 0000H ;output buffer is empty
- mov si, offset(buf_in)
- mov di, offset(buf_out)
-
- call fillbuf ;get 1st buffer's worth
- cmp cx, 0000H ;any chars?
- je p_done ;if not, quit
- call read_line ;read 1st line
- call save_line ;save as "old_line"
- call put_line ;and output it
- ;===========
- ; Main loop
- ;===========
- p_loop call read_line ;get next line
- jc p_done ;none available? done
- call compare ;is it unique?
- jc p_loop ;if not, bit bucket, and try again
- call put_line ;if so, output it
- call save_line ;and save as new template
- jmps p_loop ;and continue til EOF
-
- p_done call dumpbuf ;flush output buffer
- ret
- endp
- ;=======================================================
- ; SUBROUTINE READLINE
- ;
- ; Reads the next line from the input buffer into string
- ; CURR_LINE. If sucessful, clears the carry flag.
- ; If not sucessful, sets the carry flag
- ;=======================================================
- read_line proc near
- push bx
- push dx
- xor dx, dx
-
- mov bx, offset(curr_line)
- call getchar
- jc r_fail
-
- r_loop mov [bx], al ;put char in string
- inc bx ;bump string pointer
- inc dl ;bump char count
- cmp al, lf ;newline?
- je r_exit ;done if so
- cmp dl, 0FFH ;string too long?
- je r_exit ;abort if so
- call getchar ;get next character
- jc r_exit ;none available? exit
- jmps r_loop ;otherwise continue
-
- r_exit mov [offset(curr_length)], dl ;save length
- pop dx
- pop bx
- clc
- ret
-
- r_fail pop dx
- pop bx
- stc
- ret
- endp
-
- ;==================================
- ; SUBROUTINE SAVE_LINE
- ;
- ; Copies CURR_LINE into LAST_LINE.
- ;==================================
- save_line proc near
- push si
- push di
- push cx
- mov si, offset(curr_line)
- mov di, offset(last_line)
- xor cx, cx
- mov cl, [offset(curr_length)]
- mov [offset(last_length)], cl
- cld ;autoincrement mode
- rep
- movsb
- pop cx
- pop di
- pop si
- ret
- endp
-
- ;==================================================
- ; SUBROUTINE COMPARE
- ;
- ; Compares CURR_LINE and LAST_LINE. If identical,
- ; sets carry flag, otherwise carry is cleared.
- ;==================================================
- compare proc near
- push si
- push di
- push cx
-
- xor cx, cx
- mov cl, [offset(curr_length)] ;set comparison length
- cmp cl, comp_length
- ja c_trunc ;longer than compare length? truncate
- jmps c_doit
- c_trunc mov cl, comp_length
-
- c_doit mov di, offset(curr_line)
- mov si, offset(last_line)
- repe ;repeat until different or end
- cmpsb
- je c_match ;matched
- clc ;not identical
- jmps c_exit
- c_match stc
- c_exit pop cx
- pop di
- pop si
- ret
- endp
-
- ;================================================
- ; SUBROUTINE PUT_LINE
- ;
- ; Moves the current line into the output buffer.
- ;================================================
- put_line proc near
- push bx
- push dx
-
- mov bx, offset(curr_line)
- xor dx, dx
- mov dl, [offset(curr_length)]
- cmp dl, 0
- je pl_done
-
- pl_loop mov al, [bx] ;get char
- call putchar ;output it
- inc bx ;bump string pointer
- dec dl ;used one char
- jnz pl_loop ;loop til done
-
- pl_done pop dx
- pop bx
- ret
- endp
-
- ;======================================================
- ; SUBROUTINE GETCHAR
- ;
- ; Trys to get a character from the input buffer.
- ; If sucessful, returns with character in AL, and carry
- ; flag clear. If unsucessful, sets carry flag.
- ;======================================================
- getchar proc near
- cmp cx, 0000 ;is the buffer empty?
- jne g1 ;nope, skip
- call fillbuf ;if so, try to refill it
- cmp cx, 0000 ;still empty?
- je g_abort ;then return failure
-
- g1 lodsb ;get character from [si]
- dec cx ;used up one char
- clc ;clear flag to indicate sucess
- ret ;and return
-
- g_abort stc ;set flag for failure
- ret
- endp
-
- ;======================================================
- ; SUBROUTINE FILLBUF
- ;
- ; Fills the input buffer from StdIn. The number of
- ; available characters is stored in CX, and SI is reset
- ; to the beginning of the buffer.
- ;======================================================
- fillbuf proc near
- push bx
- push dx
- mov ah, @read ;read
- mov bx, stdin ;from stdin
- mov cx, buf_size ;one buffer's worth
- mov dx, offset(buf_in) ;into the input buffer
- int 21H
- mov cx, ax ;save number of chars read
- mov si, offset(buf_in) ;reset buffer
- pop dx
- pop bx
- ret
- endp
-
- ;===================================================
- ; SUBROUTINE PUTCHAR
- ;
- ; Moves the character in AL into the output buffer.
- ; If the buffer is now full, it is dumped to disk.
- ;===================================================
-
- putchar proc near
- stosb ;move character into buffer
- incw outnum ;bump count of chars in buffer
- cmpw outnum, buf_size ;is buffer full?
- jl pu_exit ;no, skip
- call dumpbuf ;yes, dump buffer to disk
- pu_exit ret
- endp
-
- ;==================================================
- ; SUBROUTINE DUMPBUF
- ;
- ; Dumps the output buffer to StdOut.
- ;==================================================
- dumpbuf proc near
- push ax ;save active registers
- push bx ; " " "
- push cx ; " " "
- push dx ; " " "
- mov ah, @write ;write
- mov bx, stdout ;to stdout
- mov cx, outnum ;number of chars for output
- mov dx, offset(buf_out) ;from output buffer
- int 21H
- movw outnum, 0 ;reset buffer
- mov di, offset(buf_out) ; " "
- pop dx ;restore active registers
- pop cx ; " " "
- pop bx ; " " "
- pop ax ; " " "
- ret
- endp
- ;=====================================================
- ;BUFFERS
- ;
- ; No space is actually allocated for the buffers.
- ; At run time, the program checks to ensure there
- ; is suffcient free memory, then uses the memory
- ; immediately after itself for buffers.
- ;
- ; This stratagy minimizes the size of the object file,
- ; and lets the program load quicker.
- ;======================================================
-
- outnum dw 0000H
- comp_length db 0FFH
-
- last_length
- org offset($+1)
- last_line
- org offset($+0FFH)
- curr_length
- org offset($+1)
- curr_line
- org offset($+0FFH)
- buf_in
- org offset($+buf_size)
- buf_out